{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "PyixKxWbzmaJ"
   },
   "source": [
    "# Tutorial - Data & ML Monitoring"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "yiyAqrYLsD-9",
    "outputId": "61977252-0854-40f5-b364-9d1211f2d58c"
   },
   "outputs": [],
   "source": [
    "# !pip install evidently"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "57YUIGDKsVBH"
   },
   "source": [
    "# Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "eEvG5j8FsUYF"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import datetime\n",
    "from sklearn import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ByQyBVCTvplp"
   },
   "outputs": [],
   "source": [
    "from evidently.ui.workspace.cloud import CloudWorkspace\n",
    "\n",
    "from evidently.report import Report\n",
    "\n",
    "from evidently import metrics\n",
    "from evidently.metric_preset import DataQualityPreset\n",
    "from evidently.metric_preset import DataDriftPreset\n",
    "\n",
    "from evidently.test_suite import TestSuite\n",
    "from evidently.tests import *\n",
    "from evidently.test_preset import DataDriftTestPreset\n",
    "from evidently.tests.base_test import TestResult, TestStatus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zXnoG9PEwD6F"
   },
   "outputs": [],
   "source": [
    "from evidently.ui.dashboards import DashboardPanelPlot\n",
    "from evidently.ui.dashboards import DashboardPanelTestSuite\n",
    "from evidently.ui.dashboards import PanelValue\n",
    "from evidently.ui.dashboards import PlotType\n",
    "from evidently.ui.dashboards import ReportFilter\n",
    "from evidently.ui.dashboards import TestFilter\n",
    "from evidently.ui.dashboards import TestSuitePanelType\n",
    "from evidently.renderers.html_widgets import WidgetSize"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "U2FK0gsNwa74"
   },
   "source": [
    "# Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "b_-KKirtwb9l",
    "outputId": "a6cca530-9106-4e7f-ed9b-004fd70857db"
   },
   "outputs": [],
   "source": [
    "adult_data = datasets.fetch_openml(name=\"adult\", version=2, as_frame=\"auto\")\n",
    "adult = adult_data.frame\n",
    "\n",
    "adult_ref = adult[~adult.education.isin([\"Some-college\", \"HS-grad\", \"Bachelors\"])]\n",
    "adult_prod = adult[adult.education.isin([\"Some-college\", \"HS-grad\", \"Bachelors\"])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pDZBKKSs1K0W"
   },
   "source": [
    "# Connect to Evidently Cloud"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "qOz5EDYV1KGm"
   },
   "outputs": [],
   "source": [
    "ws = CloudWorkspace(\n",
    "token=\"YOUR TOKEN HERE\",\n",
    "url=\"https://app.evidently.cloud\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create a Team"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ws.create_team(\"My team name\", org_id=\"YOUR ORG ID HERE\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "UailwyQW1Vu8"
   },
   "source": [
    "# Create a Project"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "_vq_aBn_1n2w",
    "outputId": "2e461089-d985-4122-cf2e-3d51f7d91817"
   },
   "outputs": [],
   "source": [
    "project = ws.create_project(\"My project name\", team_id=\"YOUR TEAM ID HERE\")\n",
    "project.description = \"My project description\"\n",
    "project.save()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Hh_pKDNLIk3L"
   },
   "source": [
    "# Compute snapshots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "U1K4xL2FGQHD"
   },
   "outputs": [],
   "source": [
    "data_report = Report(\n",
    "        metrics=[\n",
    "            DataDriftPreset(stattest='psi', stattest_threshold='0.3'),\n",
    "            DataQualityPreset(),\n",
    "        ],\n",
    "        timestamp=datetime.datetime.now(),\n",
    "    )\n",
    "\n",
    "data_report.run(reference_data=adult_ref, current_data=adult_prod.iloc[0 : 100, :])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "b-qBb-ZAMznl",
    "outputId": "23bc14c9-5ac5-43d4-d4e6-dae926a9c97c",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "data_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "FDWUqyguGgdW"
   },
   "outputs": [],
   "source": [
    "#add a report to the project\n",
    "ws.add_report(project.id, data_report)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#add a report together with datasets to the project\n",
    "ws.add_report(project.id, data_report, include_data=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#download dataset from the project\n",
    "downloaded_data_from_the_project = ws.load_dataset(dataset_id = \"YOUR_DATASET_ID\") \n",
    "downloaded_data_from_the_project.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#when upload data to a project a column mapping might be specified as well\n",
    "ws.add_dataset(\n",
    "    adult_prod.iloc[100 : 200, :],\n",
    "    name = \"dataset_uploaded_to_the_project\",\n",
    "    project_id = project.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Irk_GvJqIj8q"
   },
   "outputs": [],
   "source": [
    "def create_report(i: int):\n",
    "    data_report = Report(\n",
    "        metrics=[\n",
    "            DataDriftPreset(stattest='psi', stattest_threshold='0.3'),\n",
    "            DataQualityPreset(),\n",
    "        ],\n",
    "        timestamp=datetime.datetime.now() + datetime.timedelta(days=i),\n",
    "    )\n",
    "\n",
    "    data_report.run(reference_data=adult_ref, current_data=adult_prod.iloc[100 * i : 100 * (i + 1), :])\n",
    "    return data_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "cIohJzllKMtv",
    "outputId": "332dbae7-9cc0-4140-8582-f0b8c05c0cbb"
   },
   "outputs": [],
   "source": [
    "for i in range(1, 10):\n",
    "        report = create_report(i=i)\n",
    "        ws.add_report(project.id, report)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "oMoXmYy9YYL3"
   },
   "source": [
    "# Add custom panels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XBtMTEacCZhk"
   },
   "outputs": [],
   "source": [
    "project = ws.get_project(\"YOUR PROJECT ID HERE\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "luu631TAYaJj",
    "outputId": "17da3228-30ca-4a9a-a68a-4ca74c23f572"
   },
   "outputs": [],
   "source": [
    "project.dashboard.add_panel(\n",
    "        DashboardPanelPlot(\n",
    "            title=\"Daily inference Count\",\n",
    "            filter=ReportFilter(metadata_values={}, tag_values=[]),\n",
    "            values=[\n",
    "            \tPanelValue(\n",
    "                \tmetric_id=\"DatasetSummaryMetric\",\n",
    "                \tfield_path=metrics.DatasetSummaryMetric.fields.current.number_of_rows,\n",
    "                \tlegend=\"count\",\n",
    "            \t),\n",
    "            ],\n",
    "            plot_type=PlotType.LINE,\n",
    "            size=WidgetSize.FULL,\n",
    "        ),\n",
    "        tab=\"Summary\"\n",
    "    )\n",
    "project.dashboard.add_panel(\n",
    "        DashboardPanelPlot(\n",
    "            title=\"Share of drifting features (PSI > 0.3)\",\n",
    "            filter=ReportFilter(metadata_values={}, tag_values=[]),\n",
    "            values=[\n",
    "                PanelValue(\n",
    "                \tmetric_id=\"DatasetDriftMetric\",\n",
    "                \tfield_path=\"share_of_drifted_columns\",\n",
    "                \tlegend=\"share\",\n",
    "                ),\n",
    "            ],\n",
    "            plot_type=PlotType.LINE,\n",
    "            size=WidgetSize.FULL,\n",
    "        ),\n",
    "        tab=\"Summary\"\n",
    ")\n",
    "project.save()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "cY-Y4yXabWtW"
   },
   "source": [
    "# Run tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "eFpQcePVbX4a"
   },
   "outputs": [],
   "source": [
    "def create_tests(i: int):\n",
    "    drift_tests = TestSuite(\n",
    "        tests=[\n",
    "            DataDriftTestPreset(stattest_threshold=0.3),\n",
    "            TestShareOfMissingValues(lte=0.05),\n",
    "            TestNumberOfConstantColumns(eq=0),\n",
    "            TestNumberOfEmptyRows(eq=0),\n",
    "            TestNumberOfEmptyColumns(eq=0),\n",
    "            TestNumberOfDuplicatedColumns(eq=0)\n",
    "        ],\n",
    "        timestamp=datetime.datetime.now() + datetime.timedelta(days=i),\n",
    "    )\n",
    "\n",
    "    drift_tests.run(reference_data=adult_ref, current_data=adult_prod.iloc[100 * i : 100 * (i + 1), :])\n",
    "    return drift_tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "gu1S0f1AbmOP",
    "outputId": "42355c2b-e3bf-45ce-f763-ecb2afb095ab"
   },
   "outputs": [],
   "source": [
    "for i in range(0, 10):\n",
    "        test_suite = create_tests(i=i)\n",
    "        ws.add_test_suite(project.id, test_suite)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "nzzWqNY_b2-K",
    "outputId": "ea117565-423c-461e-9c02-f8cbb1278430"
   },
   "outputs": [],
   "source": [
    "project.dashboard.add_panel(\n",
    "        DashboardPanelTestSuite(\n",
    "            title=\"Data quality tests\",\n",
    "            test_filters=[\n",
    "                TestFilter(test_id=\"TestNumberOfConstantColumns\", test_args={}),\n",
    "                TestFilter(test_id=\"TestShareOfMissingValues\", test_args={}),\n",
    "                TestFilter(test_id=\"TestNumberOfEmptyRows\", test_args={}),\n",
    "                TestFilter(test_id=\"TestNumberOfEmptyColumns\", test_args={}),\n",
    "                TestFilter(test_id=\"TestNumberOfDuplicatedColumns\", test_args={}),\n",
    "            ],\n",
    "            filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True),\n",
    "            size=WidgetSize.FULL,\n",
    "            panel_type=TestSuitePanelType.DETAILED,\n",
    "            time_agg=\"1D\",\n",
    "        ),\n",
    "        tab=\"Data Tests\"\n",
    ")\n",
    "project.dashboard.add_panel(\n",
    "        DashboardPanelTestSuite(\n",
    "            title=\"Data drift per column in time\",\n",
    "            test_filters=[\n",
    "                TestFilter(test_id=\"TestColumnDrift\", test_args={}),\n",
    "            ],\n",
    "            filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True),\n",
    "            size=WidgetSize.FULL,\n",
    "            panel_type=TestSuitePanelType.DETAILED,\n",
    "            time_agg=\"1D\",\n",
    "        ),\n",
    "        tab=\"Data Tests\"\n",
    ")\n",
    "project.save()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
